This short Markdown file shows how to query the WsprDaemon Timescale Database using R and SQL.
library(RPostgres)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(DT)
library(knitr)
library(fuzzyjoin)
dt_options <- list(pageLength=7, scrollX = "400px")
The settings are available over here.
db_name <- "wsprnet"
db_host <- "logs2.wsprdaemon.org"
db_user <- "wdread"
db_password <- "JTWSPR2008"
db_con <- dbConnect(RPostgres::Postgres(),
dbname = db_name,
host = db_host,
user = db_user,
password = db_password)
Check if it worked:
dbListTables(db_con)
## [1] "spots"
Yes!
Let’s try an example based on SQL from the WsprDaemon Timescale Databases Notes (Version 2.0 November 2020):
res <- dbSendQuery(db_con,
"SELECT * FROM spots
ORDER BY wd_time DESC
LIMIT 20;")
dbFetch(res) %>%
datatable(options = dt_options)
Yes – works.
dbClearResult(res)
Let’s try one day’s worth of data – reception reports of M0INF. There’s a SQL quirk below. Variable names which aren’t all in lower case (such as CallSign) have to be referenced using double quotation marks; the quotes are escaped using the backslash. String literals like a callsign use single quotes.
one_day <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE \"CallSign\" = 'M0INF'
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-04T00:00:00Z';")
This fetches the data to a data frame:
the_dat <- dbFetch(one_day)
dbClearResult(one_day)
the_dat %>%
datatable(options = dt_options)
Now plot:
the_dat %>%
ggplot(aes(wd_time, distance)) +
geom_point() +
geom_smooth(method = "gam",
formula = y ~ s(x, k = 20)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports of M0INF",
subtitle = "3 Jan 2021, 20m band") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
Let’s do the same again; however, now with reception reports both by and of M0INF.
one_day_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-04T00:00:00Z';")
the_dat_send_rec <- dbFetch(one_day_send_receive)
dbClearResult(one_day_send_receive)
the_dat_send_rec <- the_dat_send_rec %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
The graph is a bit crowded so the y-axis is on the \(\log_{10}\) scale.
the_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
stat_smooth(geom="line",
alpha = 0.5, size = 0.8,
method = "gam",
formula = y ~ s(x, k = 25)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
Which of those reports are over 5000 km?
the_dat_send_rec %>%
filter(distance > 5000) %>%
group_by(CallSign, Reporter, distance) %>%
summarise(Spots = n()) %>%
arrange(desc(Spots)) %>%
kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
| CallSign | Reporter | distance | Spots |
|---|---|---|---|
| VK3MO | M0INF | 16880 | 7 |
| M0INF | KD2OM | 5635 | 2 |
| M0INF | PT2FHC | 8786 | 1 |
And which are under 100 km?
the_dat_send_rec %>%
filter(distance < 100) %>%
group_by(CallSign, Reporter, distance) %>%
summarise(Spots = n()) %>%
arrange(desc(Spots)) %>%
kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
| CallSign | Reporter | distance | Spots |
|---|---|---|---|
| M0INF | GB0SNB/SDR | 32 | 58 |
two_day_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-05T00:00:00Z';")
two_dat_send_rec <- dbFetch(two_day_send_receive)
dbClearResult(two_day_send_receive)
two_dat_send_rec <- two_dat_send_rec %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
two_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
stat_smooth(geom="line",
alpha = 0.5, size = 0.8,
method = "gam",
formula = y ~ s(x, k = 25)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3-4 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
The red curve doesn’t make sense since there is missing data which should essentially be zero. For now, let’s remove it…
two_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3-4 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
day_40m_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-06T00:00:00Z';")
day_40m_send_receive_dat <- dbFetch(day_40m_send_receive)
dbClearResult(day_40m_send_receive)
day_40m_send_receive_dat <- day_40m_send_receive_dat %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
day_40m_send_receive_dat %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "5 Jan 2021, 40m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
First, look at data where there’s more than one report.
dat_sent_many <- the_dat %>%
group_by(Reporter) %>%
summarise(spots = n()) %>%
arrange(desc(spots)) %>%
slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many
## # A tibble: 7 x 2
## Reporter spots
## <chr> <int>
## 1 GB0SNB/SDR 58
## 2 IW2NKE 22
## 3 YU/G8ZMF 10
## 4 SA2KHG 8
## 5 OE1WYC 7
## 6 IZ7VHF/RX 6
## 7 UA3245SWL 6
the_dat %>%
mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
filter(Reporter %in% unique(dat_sent_many$Reporter)) %>%
ggplot(aes(wd_time, dB, colour = ReportDist)) +
geom_smooth(span = 1, se = FALSE) +
labs(x = "Time", y = "Signal report (dB)",
title = "WSPR reports (20m) – signal strength",
colour = "Reporter") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Here are the WSPR frequencies, for labeling the plot in a moment.
wspr_bands <- tibble(
MHz = c(
0.136,
0.4742,
1.8366,
3.5686,
5.2872,
5364.7,
7.0386,
10.1387,
14.0956,
18.1046,
21.0946,
24.9246,
28.1246,
50.293,
70.091,
144.489,
432.300,
1296.500
),
m = 299.792458 / MHz,
wavelength_text = ifelse(m >= 1,
paste0(round(m, 0), " m"),
paste0(round(
m * 100, 0
), " cm"))
) %>%
arrange(MHz)
wspr_bands %>% kable()
| MHz | m | wavelength_text |
|---|---|---|
| 0.1360 | 2204.3563088 | 2204 m |
| 0.4742 | 632.2067862 | 632 m |
| 1.8366 | 163.2323086 | 163 m |
| 3.5686 | 84.0084229 | 84 m |
| 5.2872 | 56.7015543 | 57 m |
| 7.0386 | 42.5926261 | 43 m |
| 10.1387 | 29.5691221 | 30 m |
| 14.0956 | 21.2685134 | 21 m |
| 18.1046 | 16.5589109 | 17 m |
| 21.0946 | 14.2118105 | 14 m |
| 24.9246 | 12.0279747 | 12 m |
| 28.1246 | 10.6594390 | 11 m |
| 50.2930 | 5.9609182 | 6 m |
| 70.0910 | 4.2771891 | 4 m |
| 144.4890 | 2.0748462 | 2 m |
| 432.3000 | 0.6934824 | 69 cm |
| 1296.5000 | 0.2312321 | 23 cm |
| 5364.7000 | 0.0558824 | 6 cm |
Grab the data:
last_whispers <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"ReporterGrid\" LIKE 'IO91%'
OR \"Grid\" LIKE 'IO91%')
AND wd_time > NOW() - INTERVAL '1 hour';")
the_dat <- dbFetch(last_whispers)
dbClearResult(last_whispers)
nrow(the_dat)
## [1] 2927
Group into WSPR bands:
joined_dat <- the_dat %>%
difference_left_join(wspr_bands,
by = "MHz",
max_dist = 0.2,
distance_col = "dist_band_start") %>%
mutate(MHz.y = as.factor(MHz.y))
Check how many kHz off the match is (also look for NAs):
summary(joined_dat$dist_band_start*1000)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.377 1.451 1.501 2.100 1.535 79.088
Plot:
joined_dat %>%
ggplot(aes(distance, MHz.y, colour = MHz.y)) +
geom_jitter(height = 0.05, alpha = 1/3, size = 1) +
xlab("Distance (km)") +
ylab("Freq (MHz)") +
labs(title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time))) +
theme(legend.position = "none")
Try again, but this time separating the transmisions and receptions in IO91.
joined_dat <- joined_dat %>%
mutate(direction = ifelse(grepl(
pattern = "IO91",
x = Grid,
ignore.case = TRUE
), "Sent from IO91", "Received in IO91"))
joined_dat %>%
ggplot(aes(distance, MHz.y, colour = direction)) +
geom_point(position = position_jitterdodge(jitter.width = .1,
jitter.height = 0,
dodge.width = 0.4),
alpha = 1/3, size = 1) +
xlab("Distance (km)") +
ylab("Freq (MHz)") +
labs(title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time)),
colour = "Direction")
Let’s do the same again but also showing the direction, for transmissions from IO91 only.
tx_only <- joined_dat %>%
filter(grepl(pattern = "IO91", x = Grid, ignore.case = TRUE))
linear_scale_polar <- tx_only %>%
ggplot(aes(azimuth, distance, colour = MHz.y)) +
geom_point(alpha = 0.5) +
labs(
title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time)),
colour = "Freq (MHz)",
x = NULL,
y = "Distance"
) +
scale_x_continuous(
limits = c(0, 360),
breaks = seq(0, 315, by = 45),
minor_breaks = seq(0, 360, by = 15)
) +
coord_polar()
linear_scale_polar
linear_scale_polar +
scale_y_continuous(trans = "log10")
tx_only %>%
filter(distance > 8000) %>%
select(CallSign, Reporter, distance, azimuth, MHz.x)
## CallSign Reporter distance azimuth MHz.x
## 1 M0MBO VK7JJ 17294 77 7.040102
## 2 M0MBO BD7MLA 9535 60 7.040100
## 3 M0MBO VK4CT 16470 49 7.040102
## 4 M0MBO VK3KHZ 16939 73 7.040104
## 5 M0MBO BR7IAF/1 9432 59 7.040054
## 6 G0TLK VK7JJ 17273 79 7.040096
## 7 G0TLK VK4CT 16486 51 7.040096
## 8 G0TLK DU6/PE1NSQ 11218 57 7.040098
## 9 M0KYR KFS 8649 317 10.140200
## 10 G0TLK BD7MLA 9538 60 7.040094
## 11 M0MBO BD7MLA 9535 60 7.040098
## 12 M0MBO BR7IAF/1 9432 59 7.040054
## 13 G0TLK VK7JJ 17273 79 7.040096
## 14 G0TLK VK4CT 16486 51 7.040095
## 15 G0TLK DU6/PE1NSQ 11218 57 7.040098
## 16 G0TLK BR7IAF/1 9436 59 7.040048
## 17 G0TLK VK7JJ 17273 79 7.040095
## 18 M0MBO VK7JJ 17294 77 7.040100
## 19 G4LRP PT2FHC 8731 226 14.097059
## 20 M0KRI VK7JJ 17273 79 7.040151
## 21 M0MBO VK7JJ 17294 77 7.040101
## 22 M0KRI VK7JJ 17273 79 7.040151